{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Features Rank"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "import pandas as pd\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import yfinance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:05.921Z",
          "iopub.execute_input": "2020-07-01T01:06:05.927Z",
          "iopub.status.idle": "2020-07-01T01:06:07.121Z",
          "shell.execute_reply": "2020-07-01T01:06:07.153Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2019-01-01'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View Columns\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 completed\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-02</th>\n      <td>3.95</td>\n      <td>3.95</td>\n      <td>3.98</td>\n      <td>3.84</td>\n      <td>3.85</td>\n      <td>20548400</td>\n    </tr>\n    <tr>\n      <th>2014-01-03</th>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>3.98</td>\n      <td>22887200</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.01</td>\n      <td>42398300</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.19</td>\n      <td>42932100</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.23</td>\n      <td>30678700</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "            Adj Close  Close  High   Low  Open    Volume\nDate                                                    \n2014-01-02       3.95   3.95  3.98  3.84  3.85  20548400\n2014-01-03       4.00   4.00  4.00  3.88  3.98  22887200\n2014-01-06       4.13   4.13  4.18  3.99  4.01  42398300\n2014-01-07       4.18   4.18  4.25  4.11  4.19  42932100\n2014-01-08       4.18   4.18  4.26  4.14  4.23  30678700"
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:07.134Z",
          "iopub.execute_input": "2020-07-01T01:06:07.141Z",
          "iopub.status.idle": "2020-07-01T01:06:08.490Z",
          "shell.execute_reply": "2020-07-01T01:06:08.509Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset['Open_Close'] = (dataset['Open'] - dataset['Adj Close'])/dataset['Open']\n",
        "dataset['High_Low'] = (dataset['High'] - dataset['Low'])/dataset['Low']\n",
        "dataset['Increase_Decrease'] = np.where(dataset['Volume'].shift(-1) > dataset['Volume'],1,0)\n",
        "dataset['Buy_Sell_on_Open'] = np.where(dataset['Open'].shift(-1) > dataset['Open'],1,0)\n",
        "dataset['Buy_Sell'] = np.where(dataset['Adj Close'].shift(-1) > dataset['Adj Close'],1,0)\n",
        "dataset['Returns'] = dataset['Adj Close'].pct_change()\n",
        "dataset = dataset.dropna()\n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/html": "<div>\n<style scoped>\n    .dataframe tbody tr th:only-of-type {\n        vertical-align: middle;\n    }\n\n    .dataframe tbody tr th {\n        vertical-align: top;\n    }\n\n    .dataframe thead th {\n        text-align: right;\n    }\n</style>\n<table border=\"1\" class=\"dataframe\">\n  <thead>\n    <tr style=\"text-align: right;\">\n      <th></th>\n      <th>Adj Close</th>\n      <th>Close</th>\n      <th>High</th>\n      <th>Low</th>\n      <th>Open</th>\n      <th>Volume</th>\n      <th>Open_Close</th>\n      <th>High_Low</th>\n      <th>Increase_Decrease</th>\n      <th>Buy_Sell_on_Open</th>\n      <th>Buy_Sell</th>\n      <th>Returns</th>\n    </tr>\n    <tr>\n      <th>Date</th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n      <th></th>\n    </tr>\n  </thead>\n  <tbody>\n    <tr>\n      <th>2014-01-03</th>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>4.00</td>\n      <td>3.88</td>\n      <td>3.98</td>\n      <td>22887200</td>\n      <td>-0.005025</td>\n      <td>0.030928</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.012658</td>\n    </tr>\n    <tr>\n      <th>2014-01-06</th>\n      <td>4.13</td>\n      <td>4.13</td>\n      <td>4.18</td>\n      <td>3.99</td>\n      <td>4.01</td>\n      <td>42398300</td>\n      <td>-0.029925</td>\n      <td>0.047619</td>\n      <td>1</td>\n      <td>1</td>\n      <td>1</td>\n      <td>0.032500</td>\n    </tr>\n    <tr>\n      <th>2014-01-07</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.25</td>\n      <td>4.11</td>\n      <td>4.19</td>\n      <td>42932100</td>\n      <td>0.002387</td>\n      <td>0.034063</td>\n      <td>0</td>\n      <td>1</td>\n      <td>0</td>\n      <td>0.012106</td>\n    </tr>\n    <tr>\n      <th>2014-01-08</th>\n      <td>4.18</td>\n      <td>4.18</td>\n      <td>4.26</td>\n      <td>4.14</td>\n      <td>4.23</td>\n      <td>30678700</td>\n      <td>0.011820</td>\n      <td>0.028986</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0</td>\n      <td>0.000000</td>\n    </tr>\n    <tr>\n      <th>2014-01-09</th>\n      <td>4.09</td>\n      <td>4.09</td>\n      <td>4.23</td>\n      <td>4.05</td>\n      <td>4.20</td>\n      <td>30667600</td>\n      <td>0.026190</td>\n      <td>0.044444</td>\n      <td>0</td>\n      <td>0</td>\n      <td>1</td>\n      <td>-0.021531</td>\n    </tr>\n  </tbody>\n</table>\n</div>",
            "text/plain": "            Adj Close  Close  High   Low  Open    Volume  Open_Close  \\\nDate                                                                   \n2014-01-03       4.00   4.00  4.00  3.88  3.98  22887200   -0.005025   \n2014-01-06       4.13   4.13  4.18  3.99  4.01  42398300   -0.029925   \n2014-01-07       4.18   4.18  4.25  4.11  4.19  42932100    0.002387   \n2014-01-08       4.18   4.18  4.26  4.14  4.23  30678700    0.011820   \n2014-01-09       4.09   4.09  4.23  4.05  4.20  30667600    0.026190   \n\n            High_Low  Increase_Decrease  Buy_Sell_on_Open  Buy_Sell   Returns  \nDate                                                                           \n2014-01-03  0.030928                  1                 1         1  0.012658  \n2014-01-06  0.047619                  1                 1         1  0.032500  \n2014-01-07  0.034063                  0                 1         0  0.012106  \n2014-01-08  0.028986                  0                 0         0  0.000000  \n2014-01-09  0.044444                  0                 0         1 -0.021531  "
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:08.515Z",
          "iopub.execute_input": "2020-07-01T01:06:08.523Z",
          "shell.execute_reply": "2020-07-01T01:06:08.698Z",
          "iopub.status.idle": "2020-07-01T01:06:08.559Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = dataset.drop(['Adj Close', 'Buy_Sell'], axis=1)\n",
        "Y = dataset['Buy_Sell'].values"
      ],
      "outputs": [],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:08.569Z",
          "iopub.execute_input": "2020-07-01T01:06:08.576Z",
          "iopub.status.idle": "2020-07-01T01:06:08.586Z",
          "shell.execute_reply": "2020-07-01T01:06:09.475Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "print(X.shape)\n",
        "print(Y.shape)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "(1257, 10)\n",
            "(1257,)\n"
          ]
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:08.596Z",
          "iopub.execute_input": "2020-07-01T01:06:08.602Z",
          "iopub.status.idle": "2020-07-01T01:06:08.615Z",
          "shell.execute_reply": "2020-07-01T01:06:09.478Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "X = np.array(X).reshape(1257,-1)\n",
        "y = np.array(Y).reshape(1257,-1)"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:08.627Z",
          "iopub.execute_input": "2020-07-01T01:06:08.636Z",
          "iopub.status.idle": "2020-07-01T01:06:08.647Z",
          "shell.execute_reply": "2020-07-01T01:06:09.480Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.svm import SVR\n",
        "from sklearn.feature_selection import RFECV\n",
        "\n",
        "# Instantiate estimator and feature selector\n",
        "svr_mod = SVR(kernel=\"linear\")\n",
        "feat_selector = RFECV(svr_mod, cv=5)\n",
        "\n",
        "# Fit\n",
        "feat_selector = feat_selector.fit(X, y)\n",
        "\n",
        "# Print support and ranking\n",
        "print(feat_selector.support_)\n",
        "print(feat_selector.ranking_)\n",
        "print(X.columns)"
      ],
      "outputs": [],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:08.666Z",
          "iopub.execute_input": "2020-07-01T01:06:08.674Z",
          "iopub.status.idle": "2020-07-01T01:06:09.447Z",
          "shell.execute_reply": "2020-07-01T01:06:09.484Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.linear_model import LarsCV\n",
        "\n",
        "# Drop feature suggested not important in step 2\n",
        "# X = X.drop('sex', axis=1)\n",
        "\n",
        "# Instantiate\n",
        "lars_model = LarsCV(cv=5, normalize=False)\n",
        "\n",
        "# Fit\n",
        "feat_selector = lars_model.fit(X, y)\n",
        "\n",
        "# Print r-squared score and estimated alpha\n",
        "print(lars_model.score(X, y))\n",
        "print(lars_model.alpha_)"
      ],
      "outputs": [],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:09.456Z",
          "iopub.execute_input": "2020-07-01T01:06:09.462Z",
          "iopub.status.idle": "2020-07-01T01:06:09.528Z",
          "shell.execute_reply": "2020-07-01T01:06:09.605Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import RandomForestRegressor\n",
        "\n",
        "# Instantiate\n",
        "rf_model = RandomForestRegressor(max_depth=2, random_state=123, \n",
        "                               n_estimators=100, oob_score=True)\n",
        "\n",
        "# Fit\n",
        "rf_model.fit(X, y)\n",
        "\n",
        "print(dataset.columns)\n",
        "print(rf_model.feature_importances_)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 9,
          "data": {
            "text/plain": "(1257, 2)"
          },
          "metadata": {}
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:09.540Z",
          "iopub.execute_input": "2020-07-01T01:06:09.547Z",
          "iopub.status.idle": "2020-07-01T01:06:09.561Z",
          "shell.execute_reply": "2020-07-01T01:06:09.609Z"
        }
      }
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn.ensemble import ExtraTreesRegressor\n",
        "\n",
        "# Instantiate\n",
        "ETR_model = ExtraTreesRegressor()\n",
        "\n",
        "# Fit\n",
        "ETR_model.fit(X, y)\n",
        "\n",
        "print(dataset.columns)\n",
        "print(ETR_model_mod.feature_importances_)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 10,
          "data": {
            "text/plain": "array([[-1.44899117,  0.76442088],\n       [-1.13188616,  1.61698256],\n       [-1.13629614,  0.62013725],\n       ...,\n       [ 3.40484299,  0.20020157],\n       [ 3.47729094,  1.02990627],\n       [ 3.20984775,  0.1234806 ]])"
          },
          "metadata": {}
        }
      ],
      "execution_count": 10,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false,
        "execution": {
          "iopub.status.busy": "2020-07-01T01:06:09.570Z",
          "iopub.execute_input": "2020-07-01T01:06:09.578Z",
          "iopub.status.idle": "2020-07-01T01:06:09.591Z",
          "shell.execute_reply": "2020-07-01T01:06:09.613Z"
        }
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "name": "python",
      "codemirror_mode": {
        "version": 3,
        "name": "ipython"
      },
      "nbconvert_exporter": "python",
      "version": "3.5.5",
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "pygments_lexer": "ipython3"
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.24.0"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}